This page is about trend in the number of COVID-19 cases in the US
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.5 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.0.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(rvest)
##
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
##
## guess_encoding
library(ggplot2)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
Import data
covid_cum = read_csv("data/covid_cumulative_cases.csv", skip = 2) %>%
janitor::clean_names()
## Rows: 657 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): State, Date
## dbl (1): Total Cases
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
covid_day = read_csv("data/covid_daily_cases.csv", skip = 2) %>%
janitor::clean_names()
## Rows: 656 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): State, Date
## dbl (3): New Cases, 7-Day Moving Avg, Historic Cases
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
covid_daily = left_join(covid_day, covid_cum, by = "date") %>%
select(date, new_cases, total_cases) %>%
separate(date, into = c("month","day","year")) %>%
mutate(month = factor(month, levels = month.abb)) %>%
group_by(year, month) %>%
arrange(year, month, day) %>%
mutate(date = make_date(year, month, day)) %>%
arrange(date)
covid_monthly = covid_daily %>%
select(-day) %>%
summarize(monthly = sum(new_cases))
## `summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
covid_seasonal = covid_monthly %>%
mutate(quarter = recode(month,
"Jan" = "Q1",
"Feb" = "Q1",
"Mar" = "Q1",
"Apr" = "Q2",
"May" = "Q2",
"Jun" = "Q2",
"Jul" = "Q3",
"Aug" = "Q3",
"Sep" = "Q3",
"Oct" = "Q4",
"Nov" = "Q4",
"Dec" = "Q4"
)) %>%
group_by(year, quarter) %>%
summarize(quarterly = sum(monthly)) %>%
mutate(date = paste(year, "-", quarter))
## `summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
Plots Daily
daily_fig = plot_ly(covid_daily)
daily_fig %>%
add_trace(x = ~date, y = ~new_cases, type = "bar", yaxis="y", name = "new") %>%
add_trace(x = ~date, y = ~total_cases, type = "scatter", mode = "lines", yaxis = "y2", name = "cumulative") %>%
layout(yaxis=list(title = "daily new cases", side="left"),
yaxis2=list(title = "cumulative cases", side="right",overlaying="y"),
showlegend=TRUE)
Quarterly (to correspond with the consumption data)
covid_seasonal %>%
plot_ly(x = ~date, y = ~quarterly, type = "bar")